import os.path

import requests
from urllib.request import urlretrieve, build_opener, install_opener
from bs4 import BeautifulSoup
import os


headers = {
    'Cookie': r'zh_choose=n; Hm_lvt_45a5217289afd211a052afdd653f87c9=1711949500; _trs_uv=lugij41w_5340_6x7e; _trs_ua_s_1=lugij41w_5340_onx; Hm_lvt_8e27732e26e78ee7975a6f697a0d3bbf=1711949500; arialoadData=false; Hm_lpvt_8e27732e26e78ee7975a6f697a0d3bbf=1711950798; Hm_lpvt_45a5217289afd211a052afdd653f87c9=1711950798',
    'User-Agent': r'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36'
}


def url_i(i):
    if i == 1:
        return r'http://audit.sz.gov.cn/zxbs/sjgzbg/bjyszxbg/index.html'
    else:
        return f'http://audit.sz.gov.cn/zxbs/sjgzbg/bjyszxbg/index_{i}.html'


if not os.path.exists('pdf'):
    os.makedirs('pdf')


for i in range(1, 5):
    req = requests.get(url_i(i))
    soup = BeautifulSoup(req.content, features='lxml')
    lis = soup.select('.right_list li')
    for li in lis:
        link = li.select('a')[0]['href']
        req2 = requests.get(link)
        soup2 = BeautifulSoup(req2.content, features='lxml')
        title = soup2.select('.tit h1')[0].text
        page = soup2.select('.news_cont_d_wrap')[0].text
        attachments = soup2.select('.x866 dd a')
        for attach in attachments:
            href = attach['href']
            title_attach = attach.text.strip()
            # ext = href.split('.')[-1]
            print(f'downloading {href} as {title_attach}')
            opener = build_opener()
            opener.addheaders = [(k, v) for k, v in headers.items()]
            install_opener(opener)
            urlretrieve(href, os.path.join('pdf', title_attach))
        print(link)
        print(title)
        print(page)
        break
